## This code conducts analysis for reforestation 
## dynamics in indigenous lands in Brazil
## Authors: Kathryn Baragwanath, Ella Bayi and Nilesh Shinde
## Created:  July 2022
## Last Updated: 20 April 2023

# Begin this project in a fresh environment


# Loading Packages
library(tidyverse)
library(sf)
library(ggplot2)
library(dplyr)
library(readxl)
library(tidyr)
require(maps)
library(readxl)
library(stringr)
library(scales)
library(readr)
library(data.table)
library(hrbrthemes)
library(lattice)
library(viridis)
library(ggthemes)
library(rgdal)
library(rasterVis)
library(RColorBrewer)
library(rdrobust)
library(fixest)
library(did)
library(ggpubr)
library(grid)
library(ggplotify)
library(modelsummary)
library(here)

## Setting seed for replication
set.seed(1234)

## Set working directory to wherever this code is located in your computer
setwd(here::here())

## Loading data
load("00_data/points_final.Rdata")

## Modify code for tables to Latex

tidy.rdrobust <- function(x, ...) {
  ret <- data.frame(
    term = row.names(x$coef),
    estimate = x$coef[, 1],
    std.error = x$se[, 1],
    p.value = x$pv[, 1]
  )
  row.names(ret) <- NULL
  ret
}

glance.rdrobust <- function(x, ...) {
  ret <- data.frame(
    "Mean Control" = x$beta_p_l[1],
    Kernel = x$kernel,
    Bandwidth = x$bwselect,
    N = as.integer(round(x$N_h[1] + x$N_h[2], digits=1)),
    BW = x$bws[1,1]
  )
  ret
}

####################################################
####################################################
##############         RDD          ################
####################################################
####################################################

### 15 years After Homologation 

p <- points %>%   group_by(nearit_FID,year) %>%
  mutate(t_id = max(t)) %>% subset(t_id==1 & potential_ref>=0.027) 

p<-subset(p, year>=Date & (year<= Date +15))

p<-subset(p,  (near_line >= -20000 & near_line <= 20000))
p<-subset(p, near_line<=-1000 | near_line >=1000)
p$near_line<- ifelse(p$near_line<0, p$near_line+999,p$near_line-999)

Y <- p$extent_prop
X <- p$near_line

rdplot_extent_after <-rdplot(Y,X,c=0,  p=1,  nbins=100, h=12000, x.lim = c(-12000,12000), 
                             kernel="tri", y.lim = c(0,35), title = "" ,x.label = "Distance to Border (m)",
                             y.label = "Percent Extent")

rdplot_extent_after<- rdplot_extent_after$rdplot + theme(axis.text=element_text(size=16),
                            axis.title=element_text(size=16))

png("rd_extent.png", width = 4, height = 4, units='in', res=300)
rdplot_extent_after
dev.off()

mod1_extent_after<-rdrobust(Y,X, c=0, p=1, kernel="triangular",vce="nn", cluster = p$nearit_FID)
summary(mod1_extent_after)

## Average age of Secondary Forest 

Y <- p$mean_age
X <- p$near_line

rdplot_age_after <-rdplot(Y,X,c=0,  p=1,  nbins=100, h=12000, x.lim = c(-12000,12000), y.lim = c(0,15) , kernel="tri",  title = "" ,x.label = "Distance to Border (m)", y.label = "Average Age of Secondary Forest")

rdplot_age_after<- rdplot_age_after$rdplot + theme(axis.text=element_text(size=16),
                           axis.title=element_text(size=16))

png("rd_age.png", width = 4, height = 4, units='in', res=300)
rdplot_age_after
dev.off()

mod1_age_after<-rdrobust(Y,X, c=0, p=1, kernel="triangular",vce="nn", cluster = p$nearit_FID)
summary(mod1_age_after)

### 15 years Before Homologation 

p<- subset(points, !is.na(bef_hom_fid))
p<-subset(p, (year<Date & year>=Date-15)  & potential_ref>=0.027)
p<-subset(p,  (near_line >= -20000 & near_line <= 20000))
p<-subset(p, near_line<=-1000 | near_line >=1000)
p$near_line<- ifelse(p$near_line<0, p$near_line+999,p$near_line-999)

#RDD for Silva Junior Secondary Forest Extent
Y <- p$extent_prop
X <- p$near_line

rdplot_extent_bef <-rdplot(Y,X,c=0, nbins=100, p=1, h=12000, x.lim = c(-12000,12000), kernel="tri", y.lim = c(0,35), title = "" ,x.label = "Distance to Border (m)", y.label = "Percent Extent")

rdplot_extent_bef <- rdplot_extent_bef$rdplot + theme(axis.text=element_text(size=16),
        axis.title=element_text(size=16))

png("rd_extent.png", width = 4, height = 4, units='in', res=300)
rdplot_extent_bef
dev.off()

mod1_extent_bef<-rdrobust(Y,X, c=0, p=1, kernel="triangular", vce = "nn", cluster = p$nearit_FID)
summary(mod1_extent_bef)

### Average Age of Secondary Forest 

Y <- p$mean_age
X <- p$near_line

rdplot_age_bef <-rdplot(Y,X,c=0, nbins=100, p=1, h=12000, x.lim = c(-12000,12000), y.lim = c(0,15), kernel="tri",  title = "" ,x.label = "Distance to Border (m)", y.label = "Average Age of Secondary Forest")

rdplot_age_bef <- rdplot_age_bef$rdplot + theme(axis.text=element_text(size=16),
        axis.title=element_text(size=16))

png("rd_age.png", width = 4, height = 4, units='in', res=300)
rdplot_age_bef
dev.off()

mod1_age_bef<-rdrobust(Y,X, c=0, p=1, kernel="triangular", vce = "nn", cluster = p$nearit_FID)
summary(mod1_age_bef)

### Non Homologated

p<-subset(points, !is.na(nhom_fid) & potential_ref>=0.027)
p<-subset(p,  (near_line >= -20000 & near_line <= 20000) )
p<-subset(p, near_line<=-1000 | near_line >=1000)
p$near_line<- ifelse(p$near_line<0, p$near_line+999,p$near_line-999)

#RDD for Silva Junior Secondary Forest Extent

Y <- p$extent_prop
X <- p$near_line

rdplot_extent_nhom <-rdplot(Y,X,c=0, nbins=150, p=1, h=12000, x.lim = c(-12000,12000), kernel="tri", y.lim = c(0,35), title = "" ,x.label = "Distance to Border (m)", y.label = "Percent Extent")

rdplot_extent_nhom <- rdplot_extent_nhom$rdplot + theme(axis.text=element_text(size=16),
        axis.title=element_text(size=16))

png("rd_extent.png", width = 4, height = 4, units='in', res=300)
rdplot_extent_nhom
dev.off()

mod1_extent_nhom<-rdrobust(Y,X, c=0, p=1, kernel="triangular", vce = "nn")
summary(mod1_extent_nhom)

# Average Age of Secondary Forest

Y <- p$mean_age
X <- p$near_line

rdplot_age_nhom <-rdplot(Y,X,c=0, nbins=100, p=1, h=12000, x.lim = c(-12000,12000), y.lim=c(0,15) , kernel="tri",  title = "" ,x.label = "Distance to Border (m)", y.label = "Average Age of Secondary Forest")

rdplot_age_nhom <- rdplot_age_nhom$rdplot + theme(axis.text=element_text(size=16),
        axis.title=element_text(size=16))

png("rd_age.png", width = 4, height = 4, units='in', res=300)
rdplot_age_nhom
dev.off()

mod1_age_nhom<-rdrobust(Y,X, c=0, p=1, kernel="triangular", vce = "nn")
summary(mod1_age_nhom)

## Rdplots 
# Comparing Rdd Plots code

a<-rdplot_extent_nhom
b<-rdplot_extent_bef
c<-rdplot_extent_after

d<-rdplot_age_nhom
e<-rdplot_age_bef
f<-rdplot_age_after


plot<-ggarrange(
  a,b,c,
  nrow = 1, ncol = 3,
  labels = c("Non Hom", "Before", "After")
)
png("rd_comparison_extent_sj.png", width = 10, height = 6, units='in', res=300 )
plot
dev.off()

plot<-ggarrange(
  d,e,f,
  nrow = 1, ncol = 3,
  labels = c("Non Hom", "Before", "After")
)
png("rd_comparison_age_sj.png", width = 10, height = 6, units='in', res=300 )
plot
dev.off()

##### Tables #####
## Extent

models<-list(
  "Non Homologated"= mod1_extent_nhom,
  "Before Homologation" = mod1_extent_bef,
  "After Homologation" = mod1_extent_after
)

modelsummary(models, estimate = "{estimate}{stars}", output = "latex", title = "Secondary Vegetation, Silva Junior")

## From this output we keep only the robust estimates. 

## AGE 

models<-list(
  "Non Homologated"= mod1_age_nhom,
  "Before Homologation" = mod1_age_bef,
  "After Homologation" = mod1_age_after
)

modelsummary(models, estimate = "{estimate}{stars}", output = "latex", title = "Secondary Vegetation Age, Silva Junior")
## From this output we keep only the robust estimates. 


########  COEFPLOTS FOR DIFFERENT OUTCOME VARIABLES #########

###### Extent SJ ######
#After Homologation 

coef<-mod1_extent_after$coef
se<-mod1_extent_after$se

dat1 <- data.frame(coef, se)
dat1$type <- c("After Homologation", "After Homologation", "After Homologation")
dat1$type2<- c("Conventional", "Bias", "Robust")
dat1$val<-2


# Before Homologation 
coef<-mod1_extent_bef$coef
se<-mod1_extent_bef$se

dat2 <- data.frame(coef, se)
dat2$type <- c("Before Homologation", "Before Homologation", "Before Homologation")
dat2$type2<- c("Conventional", "Bias", "Robust")
dat2$val<-4


#Non Homologated
coef<-mod1_extent_nhom$coef
se<-mod1_extent_nhom$se

dat3 <- data.frame(coef, se)
dat3$type <- c("Non Homologated", "Non Homologated", "Non Homologated")
dat3$type2<- c("Conventional", "Bias", "Robust")
dat3$val<-6


dat<-rbind(dat1, dat2,  dat3)

dat<-subset(dat, type2=="Robust")

plot<-data.frame(dat,stringsAsFactors = F)
names(plot)<-c("Estimate","S.E.","type","test", "val")
plot$Estimate<-as.numeric(as.character(plot$Estimate))
plot$S.E.<-as.numeric(as.character(plot$S.E.))

#get upper and lower CI bounds
plot$lower<-plot$Estimate-1.96*plot$S.E.
plot$upper<-plot$Estimate+1.96*plot$S.E.
plot$lower1<-plot$Estimate-1.645*plot$S.E.
plot$upper1<-plot$Estimate+1.645*plot$S.E.
type<-plot$type
plot$type<-factor(plot$type,levels = plot$type[order(plot$val)])
#Adjust sizes and transparencies
plot$main<-c(rep("D",3))
head(plot)

mytheme <- theme(axis.line = element_line(size = 1, colour = "black"),
                 panel.background = element_rect(fill = "white"))

plot1<-ggplot() + #Set horizontal lines
  geom_vline(xintercept = c(0),linetype=1,color="black") +
  geom_point(aes(x=Estimate,y=type,color="grey"),size=2,data=plot) + # Add point estimates
  geom_segment(aes(y = type, x = lower, yend = type, xend = upper,color="grey"),size=0.8,data = plot) + #check if changing to gre works. don't like the look of these graphs that much
  theme(axis.text.y = element_text(size = 18, angle = 0, hjust = 1), axis.text.x = element_text(size = 18)) +
  scale_color_manual(values = c("black", "grey")) + mytheme + # Set colors for bars/points
  theme(text=element_text(size = 18, colour = "Black") )+ #Set how text is displayed
  xlab("SV Extent") + ylab("") +
  theme(legend.position = "none")

###### Age SJ ######
#After Homologation 

coef<-mod1_age_after$coef
se<-mod1_age_after$se

dat1 <- data.frame(coef, se)
dat1$type <- c("After Homologation", "After Homologation", "After Homologation")
dat1$type2<- c("Conventional", "Bias", "Robust")
dat1$val<-2

# Before Homologation 
coef<-mod1_age_bef$coef
se<-mod1_age_bef$se

dat2 <- data.frame(coef, se)
dat2$type <- c("Before Homologation", "Before Homologation", "Before Homologation")
dat2$type2<- c("Conventional", "Bias", "Robust")
dat2$val<-4

#Non Homologated
coef<-mod1_age_nhom$coef
se<-mod1_age_nhom$se

dat3 <- data.frame(coef, se)
dat3$type <- c("Non Homologated", "Non Homologated", "Non Homologated")
dat3$type2<- c("Conventional", "Bias", "Robust")
dat3$val<-6


dat<-rbind(dat1, dat2,  dat3)

dat<-subset(dat, type2=="Robust")

plot<-data.frame(dat,stringsAsFactors = F)
names(plot)<-c("Estimate","S.E.","type","test", "val")
plot$Estimate<-as.numeric(as.character(plot$Estimate))
plot$S.E.<-as.numeric(as.character(plot$S.E.))

#get upper and lower CI bounds
plot$lower<-plot$Estimate-1.96*plot$S.E.
plot$upper<-plot$Estimate+1.96*plot$S.E.
plot$lower1<-plot$Estimate-1.645*plot$S.E.
plot$upper1<-plot$Estimate+1.645*plot$S.E.
type<-plot$type
plot$type<-factor(plot$type,levels = plot$type[order(plot$val)])
#Adjust sizes and transparencies
plot$main<-c(rep("D",3))
head(plot)

mytheme <- theme(axis.line = element_line(size = 1, colour = "black"),
                 panel.background = element_rect(fill = "white"))

plot2<-ggplot() + #Set horizontal lines
  geom_vline(xintercept = c(0),linetype=1,color="black") +
  geom_point(aes(x=Estimate,y=type,color="grey"),size=2,data=plot) + # Add point estimates
  geom_segment(aes(y = type, x = lower, yend = type, xend = upper,color="grey"),size=0.8,data = plot) + #check if changing to gre works. don't like the look of these graphs that much
  theme(axis.text.y = element_text(size = 18, angle = 0, hjust = 1), axis.text.x = element_text(size = 18)) +
  scale_color_manual(values = c("black", "grey")) + mytheme + # Set colors for bars/points
  theme(text=element_text(size = 18, colour = "Black") )+ #Set how text is displayed
  xlab("SV Age") + ylab("") +
  theme(legend.position = "none")


plot<-ggarrange(
  plot1, plot2,
  # labels = c("SV Extent (MB)", "SV Extent (SJ)", "SV Increment (MB)", "SV Increment (SJ)"),
  nrow = 1, ncol = 2
)

png("coefplot_sv_age.png",width = 12, height = 4, units='in', res=300 )
plot
dev.off()


####################################################
####################################################
##############   EVENT STUDY TWFE   ################
####################################################
####################################################

## Including model structure into modelsummary

tidy.AGGTEobj<- function(x, ...){
  ret <- data.frame(
    term = c("ATT"),
    estimate = x[["overall.att"]],
    std.error = x[["overall.se"]]
  )
  ret
}

glance.AGGTEobj <- function(x, ...) {
  ret <- data.frame(
    "Model" = x[["type"]],
    "nobs" = x[["DIDparams"]][["n"]],
    "ntime" =x[["DIDparams"]][["nT"]],
    "vcov.type" = x[["DIDparams"]][["clustervars"]]
    
  )
  ret
}


## Getting data ready 

p<-subset(points,  (near_line >= -20000 & near_line <= 20000))
p<-subset(p, near_line<=-1000 | near_line >=1000)
p$near_line<- ifelse(p$near_line<0, p$near_line+999,p$near_line-999)

p<-as.data.table(p)
p[, time_to_treat := ifelse(treatment==1, year - `Date`, NA)]

p <- p %>%   group_by(nearit_FID) %>%
  fill(time_to_treat) %>%
  fill(time_to_treat, .direction = "up")

## Dropping always treated units from sample. 
p$g<-ifelse(p$treatment==1, p$Date, 0)
table(p$g)

p_sub<-subset(p,g==0 | g>=1989)

did.att.gt.cov <- att_gt(yname="extent_prop",
                        tname="year",
                        idnam="FID_1",
                        gname="g",
                        control_group = "notyettreated",
                        clustervars = "nearit_FID",
                        xformla = ~ near_mines + near_roads + elevation1,
                        data=p_sub)

# summary(did.att.gt)

es_dynamic<-aggte(did.att.gt.cov, type = "dynamic")
summary(es_dynamic)

dynamic.plot.cov<- ggdid(es_dynamic, xgap = 5, title=" ", ) +
  theme(axis.text.y = element_text(size = 18, angle = 0, hjust = 1),
        axis.text.x = element_text(size = 18)) + theme(legend.text=element_text(size=18)) +
  xlim(-10,25) + ylim(-1.5,9) 

dynamic.plot.cov

png("event_did_dynamic.png",width = 10, height = 6, units='in', res=300 )
dynamic.plot.cov
dev.off()

es_simple<-aggte(did.att.gt.cov, type = "simple")
summary(es_simple)

es_group <- aggte(did.att.gt.cov, type = "group")
summary(es_group)

es_calendar <- aggte(did.att.gt.cov, type = "calendar")
summary(es_calendar)

sv_simple<-data.frame(es_simple[["overall.att"]], es_simple[["overall.se"]], es_simple[["type"]],  es_simple[["DIDparams"]][["n"]], es_simple[["DIDparams"]][["nT"]])
colnames(sv_simple)<-c("coeff", "std.error", "type", "n", "periods")

sv_dynamic<-data.frame(es_dynamic[["overall.att"]], es_dynamic[["overall.se"]], es_dynamic[["type"]], es_dynamic[["DIDparams"]][["n"]], es_dynamic[["DIDparams"]][["nT"]])
colnames(sv_dynamic)<-c("coeff", "std.error", "type", "n", "periods")

sv_calendar<-data.frame(es_calendar[["overall.att"]], es_calendar[["overall.se"]], es_calendar[["type"]], es_calendar[["DIDparams"]][["n"]], es_calendar[["DIDparams"]][["nT"]])
colnames(sv_calendar)<-c("coeff", "std.error", "type", "n", "periods")

sv_group<-data.frame(es_group[["overall.att"]], es_group[["overall.se"]], es_group[["type"]], es_group[["DIDparams"]][["n"]], es_group[["DIDparams"]][["nT"]])
colnames(sv_group)<-c("coeff", "std.error", "type", "n", "periods")

sv_res_extent<-rbind( sv_dynamic,sv_simple, sv_calendar, sv_group)
sv_res_extent

### Age of secondary forest
did.att.gt.age <- att_gt(yname="mean_age",
                         tname="year",
                         idnam="FID_1",
                         gname="g",
                         clustervars = "nearit_FID",
                         control_group = "notyettreated",                      
                         data=p_sub)

# summary(did.att.gt)

es_dynamic.cov.age<-aggte(did.att.gt.age, type = "dynamic")
summary(es_dynamic.cov.age)

dynamic.plot.cov.age<- ggdid(es_dynamic.cov.age, xgap = 5, title=" ", ) +
  theme(axis.text.y = element_text(size = 18, angle = 0, hjust = 1), 
        axis.text.x = element_text(size = 18)) + theme(legend.text=element_text(size=18)) +
  xlim(-10,25) + ylim(-1,12)

dynamic.plot.cov.age

png("event_did_dynamic_age.png",width = 10, height = 6, units='in', res=300 )
dynamic.plot.cov.age
dev.off()

## Table 
es_simple_age<-aggte(did.att.gt.age, type = "simple")
summary(es_simple_age)

es_group_age <- aggte(did.att.gt.age, type = "group")
summary(es_group_age)

es_calendar_age <- aggte(did.att.gt.age, type = "calendar")
summary(es_calendar_age)


sv_simple<-data.frame(es_simple_age[["overall.att"]], es_simple_age[["overall.se"]], es_simple_age[["type"]],  es_simple_age[["DIDparams"]][["n"]], es_simple_age[["DIDparams"]][["nT"]])
colnames(sv_simple)<-c("coeff", "std.error", "type", "n", "periods")

sv_dynamic<-data.frame(es_dynamic.cov.age[["overall.att"]], es_dynamic.cov.age[["overall.se"]], es_dynamic.cov.age[["type"]], es_dynamic.cov.age[["DIDparams"]][["n"]], es_dynamic.cov.age[["DIDparams"]][["nT"]])
colnames(sv_dynamic)<-c("coeff", "std.error", "type", "n", "periods")

sv_calendar<-data.frame(es_calendar_age[["overall.att"]], es_calendar_age[["overall.se"]], es_calendar_age[["type"]], es_calendar_age[["DIDparams"]][["n"]], es_calendar_age[["DIDparams"]][["nT"]])
colnames(sv_calendar)<-c("coeff", "std.error", "type", "n", "periods")

sv_group<-data.frame(es_group_age[["overall.att"]], es_group_age[["overall.se"]], es_group_age[["type"]], es_group_age[["DIDparams"]][["n"]], es_group_age[["DIDparams"]][["nT"]])
colnames(sv_group)<-c("coeff", "std.error", "type", "n", "periods")

sv_res<-rbind( sv_dynamic, sv_simple, sv_calendar, sv_group)

sv_res

plot<-ggarrange(
  dynamic.plot.cov , dynamic.plot.cov.age,
  labels = c("A. SV Extent", "B. SV Age"),
  nrow = 1, ncol = 2
)

png("event_did.png",width = 12, height = 6, units='in', res=300 )
plot
dev.off()
